# Direct outputs not eligible for public release

# GLOBAL SETTINGS --------------------------------------------------------------

options(
    scipen = 999,
    digits = 16,
    max.print = .Machine$integer.max,
    show.error.locations = TRUE,
    warn = 1
)

RNGkind("L'Ecuyer-CMRG")
seed <- 818675309L
set.seed(seed) # setting main seed

# PACKAGES ---------------------------------------------------------------------
library(data.table)
library(Matrix)
library(zoo)
library(multiwayvcov)
library(lmtest)
library(checkmate)
library(futile.logger)
library(lfe)

library(remotes)
remotes::install_github("setzler/eventStudy/eventStudy")
library(eventStudy) # for DiD-IV

# PACKAGE SETTINGS -------------------------------------------------------------

# data.table
setDTthreads(threads = 1L)
options(datatable.print.class = TRUE, datatable.print.keys = TRUE)
# so that printing the data.table also shows the variable type on top

# BEGIN FILE -------------------------------------------------------------------

source("~/code/0-utility-functions/wald_es.R", local = TRUE)
outcome <- "db_w2_wages"

# Read in lottery panel data
lottery_panel <- readRDS("~/population-panel-data/lottery_panel_data.rds")

# For all outcomes, replace with zero if missing
if (
    (class(lottery_panel[[outcome]]) == "integer") ||
        (class(lottery_panel[[outcome]]) == "numeric")
) {
    lottery_panel[is.na(get(outcome)), (outcome) := 0]
}

# Each treated cohort will be restricted to age 21-64 in year 0 and
# only control units age 21-64 in the same calendar year will be used
lottery_panel[
    ,
    age_case := as.logical(between(age, 21, 64, incbounds = TRUE))
]

# Run stacked event-study regression
did_results <-
    Wald_ES2(
        long_data = copy(lottery_panel),
        outcomevar = outcome,
        unit_var = "tin",
        cal_time_var = "tax_yr",
        onset_time_var = "win_yr",
        cluster_vars = "tin",
        omitted_event_time = -2,
        discrete_covars = "age",
        control_subset_var = "age_case",
        control_subset_event_time = 0,
        treated_subset_var = "age_case",
        treated_subset_event_time = 0,
        heterogeneous_only = TRUE,
        anticipation = 0,
        ipw = TRUE,
        ipw_model = "linear",
        ipw_composition_change = FALSE,
        ipw_keep_data = TRUE,
        ipw_ps_lower_bound = 0,
        ipw_ps_upper_bound = 1
    )

saveRDS(
    did_results,
    sprintf(
        "~/estimation-output/event_study_estimates_%s_ipw.rds",
        outcome
    )
)
